from sklearn.metrics import r2_score, mean_squared_error
def adj_r2(y_true, y_pred, d):
n = len(y_true)
return 1-(n-1)/(n-d-1)*(1-r2_score(y_true, y_pred))
cat = pd.get_dummies(data['origin'].astype(object), drop_first=True)*1
cat.columns = ['orig2', 'orig3']
X_full, y_full = pd.concat(
[cat,
data.drop(columns=['mpg', 'car name', 'origin'])],
axis=1), df['mpg']
lm_full = LinearRegression()
lm_full.fit(X_full, y_full)
pred_full = lm_full.predict(X_full)
X2 = data[['model year', 'cylinders']]
X1 = data[['weight']]
lm2 = LinearRegression()
lm1 = LinearRegression()
lm2.fit(X2, y_full)
lm1.fit(X1, y_full)
df_r2 = pd.DataFrame({
'R2' : [r2_score(y_full, lm1.predict(X1)),
r2_score(y_full, lm2.predict(X2)),
r2_score(y_full, pred_full)],
'Adj-R2' : [adj_r2(y_full, lm1.predict(X1), 1),
adj_r2(y_full, lm2.predict(X2), 2),
adj_r2(y_full, pred_full, X_full.shape[1])]
}, index=['LR1', 'LR2', 'LR-full'])
df_r2